Current File : //lib64/nagios/plugins/check_io |
#!/bin/bash
# WebSouls Technical Department
# ▄▄▄▄▄▄ ▄▄ ▄▄▄▄ ▄▄▄▄ ▄▄▄▄▄▄▄▄ ▄▄ ▄▄▄▄▄▄▄▄
# ▀▀██▀▀ ██ ██▀▀██ ▄█▀▀▀▀█ ▀▀▀██▀▀▀ ████ ▀▀▀██▀▀▀
# ██ ██ ██ ██ ██▄ ██ ████ ██
# ██ ██ ██ ██ ▀████▄ ██ ██ ██ ██
# ██ ▄█▀ ██ ██ ▀██ ██ ██████ ██
# ▄▄██▄▄ ▄█▀ ██▄▄██ █▄▄▄▄▄█▀ ██ ▄██ ██▄ ██
# ▀▀▀▀▀▀ ▄█▀ ▀▀▀▀ ▀▀▀▀ ▀▀ ▀▀ ▀▀ ▀▀
#
DISK=
WARNING=
CRITICAL=
E_OK=0
E_WARNING=1
E_CRITICAL=2
E_UNKNOWN=3
BRIEF=0
SILENT=0
show_help() {
echo
echo "$0 -d DEVICE [ -w tps,read,write -c tps,read,write ] "
echo " | [ -W qlen -C qlen ] | -h"
echo
echo "This plug-in is used to be alerted when maximum hard drive io/s, sectors"
echo "read|write/s or average queue length is reached."
echo
echo " -d DEVICE DEVICE must be without /dev (ex: -d sda)."
echo " To specify a LVM logical volume use:"
echo " volgroup/logvol."
echo " To specify symlink from /dev/disk/ use full path, ex:"
echo " /dev/disk/by-id/scsi-35000c50035006fb3"
echo " -w/c TPS,READ,WRITE TPS means transfer per seconds (aka IO/s)"
echo " READ and WRITE are in sectors per seconds"
echo " -W/C NUM Use average queue length thresholds instead.."
echo " -b Brief output."
echo " -s silent output: no warnings or critials are issued"
echo
echo "Performance data for graphing is supplied for tps, read, write, avgrq-sz,"
echo "avgqu-sz and await (see iostat man page for details)."
echo
echo "Example: Tps, read and write thresholds:"
echo " $0 -d sda -w 200,100000,100000 -c 300,200000,200000"
echo
echo "Example: Average queue length threshold:"
echo " $0 -d sda -W 50 -C 100"
echo
}
# process args
while [ ! -z "$1" ]; do
case $1 in
-b) BRIEF=1 ;;
-s) SILENT=1 ;;
-d) shift; ORIGDISK=$1; DISK=${1////!} ;;
-w) shift; WARNING=$1 ;;
-c) shift; CRITICAL=$1 ;;
-W) shift; WARN_QSZ=$1 ;;
-C) shift; CRIT_QSZ=$1 ;;
-h) show_help; exit 1 ;;
esac
shift
done
# generate HISTFILE filename
HISTFILE=/var/tmp/check_diskstat_`id -nu`.$DISK
# check input parameters so we can continu !
sanitize() {
# check device name
if [ -z "$DISK" ]; then
echo "Need device name, ex: sda"
exit $E_UNKNOWN
fi
if [ -z $WARN_QSZ ]; then
# check thresholds
if [ -z "$WARNING" ]; then
echo "Need warning threshold"
exit $E_UNKNOWN
fi
if [ -z "$CRITICAL" ]; then
echo "Need critical threshold"
exit $E_UNKNOWN
fi
if [ -z "$WARN_TPS" -o -z "$WARN_READ" -o -z "$WARN_WRITE" ]; then
echo "Need 3 values for warning threshold (tps,read,write)"
exit $E_UNKNOWN
fi
if [ -z "$CRIT_TPS" -o -z "$CRIT_READ" -o -z "$CRIT_WRITE" ]; then
echo "Need 3 values for critical threshold (tps,read,write)"
exit $E_UNKNOWN
fi
else
if [ -z "$CRIT_QSZ" ]; then
echo "Need '-C' option."
exit $E_UNKNOWN
fi
fi
}
readdiskstat() {
if [ ! -f "/sys/block/$1/stat" ]; then
return $E_UNKNOWN
fi
cat /sys/block/$1/stat
}
readhistdiskstat() {
[ -f $HISTFILE ] && cat $HISTFILE
}
# process thresholds
if [ -z $WARN_QSZ ]; then
WARN_TPS=$(echo $WARNING | cut -d , -f 1)
WARN_READ=$(echo $WARNING | cut -d , -f 2)
WARN_WRITE=$(echo $WARNING | cut -d , -f 3)
CRIT_TPS=$(echo $CRITICAL | cut -d , -f 1)
CRIT_READ=$(echo $CRITICAL | cut -d , -f 2)
CRIT_WRITE=$(echo $CRITICAL | cut -d , -f 3)
# check args
fi
sanitize
if [ ! -e /sys/block/$DISK/stat ]; then
# The device does not exist.
if [[ $ORIGDISK =~ "/" && -b /dev/$ORIGDISK ]]; then
# The minor device no. maps to /dev/dm-N
MINOR_HEX=`stat -L /dev/$ORIGDISK --printf="%T\n"`
MINOR=`echo $((16#$MINOR_HEX))` # translate hex output to decimal
[[ $? -ne 0 ]] && {
echo "Could not stat '/dev/$ORIGDISK', check your /sys filesystem for $DISK"
exit $E_UNKNOWN
}
DISK="dm-$MINOR"
elif [[ -L $ORIGDISK ]]; then
# Symlink to device name
SNAME=`readlink $ORIGDISK`
DISK=`basename $SNAME`
else
echo "Could not find disk stats, check your /sys filesystem for $DISK"
exit $E_UNKNOWN
fi
fi
NEWDISKSTAT=$(readdiskstat $DISK)
if [ $? -eq $E_UNKNOWN ]; then
echo "Cannot read disk stats, check your /sys filesystem for $DISK"
exit $E_UNKNOWN
fi
if [ ! -f $HISTFILE ]; then
echo $NEWDISKSTAT >$HISTFILE
echo "UNKNOWN - Initial buffer creation..."
exit $E_UNKNOWN
fi
OLDDISKSTAT=$(readhistdiskstat)
if [ $? -ne 0 ]; then
echo "Cannot read histfile $HISTFILE..."
exit $E_UNKNOWN
fi
OLDDISKSTAT_TIME=$(stat $HISTFILE | grep Modify | sed 's/^.*: \(.*\)$/\1/')
OLDDISKSTAT_EPOCH=$(date -d "$OLDDISKSTAT_TIME" +%s)
NEWDISKSTAT_EPOCH=$(date +%s)
echo $NEWDISKSTAT >$HISTFILE
# now we have old and current stat;
# let compare it
OLD_SECTORS_READ=$(echo $OLDDISKSTAT | awk '{print $3}')
NEW_SECTORS_READ=$(echo $NEWDISKSTAT | awk '{print $3}')
OLD_READ=$(echo $OLDDISKSTAT | awk '{print $1}')
NEW_READ=$(echo $NEWDISKSTAT | awk '{print $1}')
OLD_WRITE=$(echo $OLDDISKSTAT | awk '{print $5}')
NEW_WRITE=$(echo $NEWDISKSTAT | awk '{print $5}')
OLD_SECTORS_WRITTEN=$(echo $OLDDISKSTAT | awk '{print $7}')
NEW_SECTORS_WRITTEN=$(echo $NEWDISKSTAT | awk '{print $7}')
# kernel handles sectors by 512bytes
SECTORBYTESIZE=512
# fix overflowing 32bit counter (4294967296 = 2^32)
if [ $NEW_SECTORS_READ -lt $OLD_SECTORS_READ ] ; then
let "OLD_SECTORS_READ = $OLD_SECTORS_READ - 4294967296"
fi
if [ $NEW_SECTORS_WRITTEN -lt $OLD_SECTORS_WRITTEN ] ; then
let "OLD_SECTORS_WRITTEN = $OLD_SECTORS_WRITTEN - 4294967296";
fi
let "SECTORS_READ = $NEW_SECTORS_READ - $OLD_SECTORS_READ"
let "SECTORS_WRITE = $NEW_SECTORS_WRITTEN - $OLD_SECTORS_WRITTEN"
let "TIME = $NEWDISKSTAT_EPOCH - $OLDDISKSTAT_EPOCH"
let "BYTES_READ_PER_SEC = $SECTORS_READ * $SECTORBYTESIZE / $TIME"
let "BYTES_WRITTEN_PER_SEC = $SECTORS_WRITE * $SECTORBYTESIZE / $TIME"
let "TPS=($NEW_READ - $OLD_READ + $NEW_WRITE - $OLD_WRITE) / $TIME"
let "KBYTES_READ_PER_SEC = $BYTES_READ_PER_SEC / 1024"
let "KBYTES_WRITTEN_PER_SEC = $BYTES_WRITTEN_PER_SEC / 1024"
# From iostat source
#
# xds->await = (sdc->nr_ios - sdp->nr_ios) ?
# ((sdc->rd_ticks - sdp->rd_ticks) + (sdc->wr_ticks - sdp->wr_ticks)) /
# ((double) (sdc->nr_ios - sdp->nr_ios)) : 0.0;
# xds->arqsz = (sdc->nr_ios - sdp->nr_ios) ?
# ((sdc->rd_sect - sdp->rd_sect) + (sdc->wr_sect - sdp->wr_sect)) /
# ((double) (sdc->nr_ios - sdp->nr_ios)) : 0.0;
#
# iostat 'avgrq-sz' = arqsz
#OLD_INFLIGHT=$(echo $OLDDISKSTAT | awk '{print $9}')
#NEW_INFLIGHT=$(echo $NEWDISKSTAT | awk '{print $9}')
#let "INFLIGHT = $NEW_INFLIGHT - $OLD_INFLIGHT" #requests
#OLD_IOTICKS=$(echo $OLDDISKSTAT | awk '{print $10}')
#NEW_IOTICKS=$(echo $NEWDISKSTAT | awk '{print $10}')
#let "IOTICKS = $NEW_IOTICKS - $OLD_IOTICKS" #ms
OLD_WAITTIME_READ=$(echo $OLDDISKSTAT | awk '{print $4}')
NEW_WAITTIME_READ=$(echo $NEWDISKSTAT | awk '{print $4}')
let "READ_TICKS = $NEW_WAITTIME_READ - $OLD_WAITTIME_READ" #ms
OLD_WAITTIME_WRITE=$(echo $OLDDISKSTAT | awk '{print $8}')
NEW_WAITTIME_WRITE=$(echo $NEWDISKSTAT | awk '{print $8}')
let "WRITE_TICKS = $NEW_WAITTIME_WRITE - $OLD_WAITTIME_WRITE" #ms
let "NR_IOS = $NEW_READ - $OLD_READ + $NEW_WRITE - $OLD_WRITE"
OLD_TIMEINQ=$(echo $OLDDISKSTAT | awk '{print $11}')
NEW_TIMEINQ=$(echo $NEWDISKSTAT | awk '{print $11}')
let "TIMEINQ = $NEW_TIMEINQ - $OLD_TIMEINQ" #ms
: $((++$NR_IOS)) ; : $((--$NR_IOS))
let "AQUSZ = ( $TIMEINQ / $TIME ) / 1000"
if [[ $NR_IOS -ne 0 ]]; then
let "AWAIT = ( $READ_TICKS + $WRITE_TICKS ) / $NR_IOS"
let "ARQSZ = ( $SECTORS_READ + $SECTORS_WRITE ) / $NR_IOS"
else
AWAIT=0
ARQSZ=0
fi
OUTPUT=""
EXITCODE=$E_OK
if [ -z $WARN_QSZ ]; then
# check TPS
if [ $TPS -gt $WARN_TPS ]; then
if [ $TPS -gt $CRIT_TPS ]; then
OUTPUT="critical IO/s (>$CRIT_TPS), "
EXITCODE=$E_CRITICAL
else
OUTPUT="warning IO/s (>$WARN_TPS), "
EXITCODE=$E_WARNING
fi
fi
# check read
if [ $BYTES_READ_PER_SEC -gt $WARN_READ ]; then
if [ $BYTES_READ_PER_SEC -gt $CRIT_READ ]; then
OUTPUT="${OUTPUT}critical read sectors/s (>$CRIT_READ), "
EXITCODE=$E_CRITICAL
else
OUTPUT="${OUTPUT}warning read sectors/s (>$WARN_READ), "
[ "$EXITCODE" -lt $E_CRITICAL ] && EXITCODE=$E_WARNING
fi
fi
# check write
if [ $BYTES_WRITTEN_PER_SEC -gt $WARN_WRITE ]; then
if [ $BYTES_WRITTEN_PER_SEC -gt $CRIT_WRITE ]; then
OUTPUT="${OUTPUT}critical write sectors/s (>$CRIT_WRITE), "
EXITCODE=$E_CRITICAL
else
OUTPUT="${OUTPUT}warning write sectors/s (>$WARN_WRITE), "
[ "$EXITCODE" -lt $E_CRITICAL ] && EXITCODE=$E_WARNING
fi
fi
else
# check WARN_QSZ
if [ $AQUSZ -gt $WARN_QSZ ]; then
if [ $AQUSZ -gt $CRIT_QSZ ]; then
OUTPUT="critical queue size (>$CRIT_QSZ), "
EXITCODE=$E_CRITICAL
else
OUTPUT="warning queue size (>$WARN_QSZ), "
EXITCODE=$E_WARNING
fi
fi
fi
if [[ $BRIEF -eq 0 ]]; then
echo "${OUTPUT}summary: $TPS io/s, read $SECTORS_READ sectors (${KBYTES_READ_PER_SEC}kB/s), write $SECTORS_WRITE sectors (${KBYTES_WRITTEN_PER_SEC}kB/s), queue size $AQUSZ in $TIME seconds | tps=${TPS}io/s;;; read=${BYTES_READ_PER_SEC}b/s;;; write=${BYTES_WRITTEN_PER_SEC}b/s;;; avgrq-sz=${ARQSZ};;; avgqu-sz=${AQUSZ};$WARN_QSZ;$CRIT_QSZ; await=${AWAIT}ms;;;"
else
echo "$TPS io/s, read ${KBYTES_READ_PER_SEC}kB/s, write ${KBYTES_WRITTEN_PER_SEC}kB/s, ave. queue size ${AQUSZ} | tps=${TPS}io/s;;; read=${BYTES_READ_PER_SEC}b/s;;; write=${BYTES_WRITTEN_PER_SEC}b/s;;; avgrq-sz=${ARQSZ};;; avgqu-sz=${AQUSZ};$WARN_QSZ;$CRIT_QSZ; await=${AWAIT}ms;;;"
fi
if [[ $SILENT -eq 1 ]]; then
EXITCODE=$E_OK
fi
exit $EXITCODE